## Regression Analysis: Employment Share of Large Retailer

#-----------------------------------------------------------#
#Load dataset
## Cross Employer Wage Elasticity Estimates

#CHOOSE sample size
sample_choice <- "10_percent_sample"

#CHOOSE balance requirement of the panel ("choice can be: unbalanced", "pre_post_1_t" or "pre_post_3_t")
suffix_mild_balanced = "pre_post_3_t"

#Define directory to save the tex tables
output_tables_dir <- file.path(getwd(), "output/")

#-----------------------------------------------------------#
#Load dataset
df <- readRDS(paste0("data/derived/mild_balanced_", suffix_mild_balanced,"_stacked_nonpolicy_firm_payroll_provider_dataset_", sample_choice, ".rds"))

df <- df %>% 
  mutate(super_gap_std = as.vector(scale(super_gap)),
         T_std = as.vector(scale(T)))

df <- df %>%
  mutate(cz = czone)

## Load the stacked_policy_firm_wages_data 

stacked_policy_firm_wages_data <- read.csv("data/credit_bureau/stacked_policy_firm_wages_dataset.csv")

#### Merge the Data 
df <- left_join(df, stacked_policy_firm_wages_data, by = c("cz", "etime", "trt_exp"))

#-----------------------------------------------------------#
# Prep Regression Analysis 

#Create post_period dummy
df = df %>% 
  mutate(post_period24 = ifelse(etime > -1, 1, 0))

df = df %>% 
  mutate(post_period12 = case_when(
    etime >= 0 & etime <= 6 ~ 1, 
    etime <= -1 & etime >= -6 ~ 0, 
    TRUE ~ NA_real_
  ))


## only the Firms where: employment_share_q_all_decile

df$emp_share_q_all_greater6 <- ifelse(df$employment_share_q_all_decile > 6, 1, 0)

#-----------------------------------------------------------#
# Variable Definitions

policy_y = c("ln_wage_policy")

y = c("ln_avg_wage_exact")

#Define set of treatment variables of interest
t = c("T_std") # , "super_gap_std"

#employment_share_var = c("emp_share_greater6", "emp_share_q_all_greater6")
employment_share_var = c("emp_share_q_all_greater6")

period_var = c("post_period12")

#-----------------------------------------------------------#
## Regression Analysis

for(p in period_var){
  
  for(e in employment_share_var){
    
    #Set the according variable for the unstandardized treatment variable
    unst_t = substr(t, start = 1, stop = nchar(t) - 4)
    
    #Set a dictionary for variables' names
    dict = c(ln_avg_wage_exact = "Log Average Wage",
             T_std = "Large Retailer Gap (std.)",
             #post_period = "Post",
             clt_client = "Firm",
             czone = "CZ",
             etime = "Event time",
             trt_exp = "")
    
    fixest::setFixest_dict(dict)
    
    #Define formula for regression (this is necessary to loop over different dependent variables)
    formula1a = as.formula(paste(policy_y, "~ ", t, "*", p,  "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    formula1b = as.formula(paste(y, "~ ", t, "*", p, "*", e,  "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    
    fit1a = fixest::feols(formula1a, cluster = ~czone, data = df)
    fit1b = fixest::feols(formula1b, cluster = ~czone, data = df)
    
    sd_unst_ta = df[fixest::obs(fit1a), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    sd_unst_tb = df[fixest::obs(fit1b), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    cz_count <- n_distinct(df$czone)
    clt_client_count <- n_distinct(df$clt_client)
    
    table_style <- fixest::style.tex(main = "aer",
                                     model.format = "",
                                     #model.title = "\\textit{OLS Dep. var: Cross Wage Elasticity}",
                                     yesNo = "Y",
                                     depvar.title = "Dep. var:",
                                     depvar.style = "*", 
                                     fixef.title = "\\midrule",
                                     fixef.suffix = "FEs",
                                     fixef.where = "var",
                                     line.top = "double"
    )
    
    #Output customized TEX table
    fixest::etable(fit1a, fit1b,
                   tex = T,
                   style.tex = table_style,
                   depvar = F, #remove dependent variable title from the top part of the table
                   headers = c("Log Average Policy Wage", "Log Average Wage Exact"),
                   fitstat = ~ n + r2,
                   #group = c("OLS", "IV"),
                   file = file.path(
                     output_tables_dir,
                     paste0("emp_share_OLS_10_percent", y, "_treat_var_", t, p, e, ".tex")
                   ),
                   replace = TRUE,
                   #row.labels = c("OLS", "IV"),
                   extralines = list( "CZ's" = c(cz_count, cz_count),
                                      "Number of Firms" = c(clt_client_count, clt_client_count),
                                      "SD treat. var. (unstd.)" = c(sd_unst_ta, sd_unst_tb)
                   )
                   #model.order
    )
    
    
  }
}